*** Load dataset
use "$data\2_firm_regressions_individual_paper.dta", clear  
set scheme s2color
	
	
	gen minwagegap=0 if sh_aminwage==1 
	replace minwagegap=(no_pay_b35k*(60000-17500))+(no_pay_35k60k*(60000-47500)) if inlist(wave,1,2) & missing(minwagegap)
	replace minwagegap=(no_pay_b35k*(75000-22500))+(no_pay_35k60k*(75000-60000)) if wave==3 & missing(minwagegap)
	replace minwagegap=. if minwagegap==0 & (missing(no_pay_total) | no_pay_total==0)

	replace minwagegap=minwagegap/537.286 // in USD
	replace minwagegap=minwagegap/1000 // in '000 USD
	gen minwagegap_annual=minwagegap*12
	la var minwagegap "Estimated costs of minimum wage compliance/month ('000 USD)"
	la var minwagegap_annual "Estimated costs of minimum wage compliance/year ('000 USD)"
		
	
	gen sosecgap=0 if sh_sosec==1 
	replace sosecgap=(((no_pay_b35k+no_pay_35k60k)*0.25*60000)+(no_pay_60k85k*0.25*72500)+(no_pay_85k125k*0.25*105000)+(no_pay_a125k*0.25*125000)) if sh_sosec==0 
	
	replace sosecgap=(((no_pay_b35k+no_pay_35k60k)*0.25*60000)+(no_pay_60k85k*0.25*72500)+(no_pay_85k125k*0.25*105000)+(no_pay_a125k*0.25*225000))*(1-sh_sosec) if sh_sosec>0  & sh_sosec<1 // for above 125000, assume 225k (mean in ind. sample in this category)
	replace sosecgap=(no_employees*0.25*130000)*(1-sh_sosec) if sh_sosec<1 & sosecgap==0 & (no_pay_total==0 | missing(no_pay_total)) // if we don't have wage info, assume they have to pay for mean wage (130,000, from ind sample)
	
	replace sosecgap=. if inrange(wave,2,3)
	replace sosecgap=sosecgap/537.286 // in USD
	replace sosecgap=sosecgap/1000 // in '000 USD
	gen sosecgap_annual=sosecgap*12
	la var sosecgap "Estimated costs of social security compliance/month"
	
	
	foreach var of varlist costs_labour* {
		format `var' %13.3fc

	}
	
	sort id wave, stable
	by id: fillmissing(costs_labour_2021)

	
	gen minwagegap_perc=100*(minwagegap_annual)/costs_labour_2021
	replace minwagegap_perc=. if costs_labour==0 
	la var minwagegap_perc "Estimated costs of minimum wage compliance (% of 2021 annual labour costs)"
	
	replace minwagegap_perc=. if id==15379 // Inconsistent info on labour costs and pay 
	replace minwagegap_perc= . if sh_aminwage ==.

	gen minwagegap_perc_te=(minwagegap_perc*0.43)
	
	
	gen minwagegap_perc_cat=0 if minwagegap_perc==0 
	replace minwagegap_perc_cat=1 if inrange(minwagegap_perc,0.0000001,10)
	replace minwagegap_perc_cat=2 if inrange(minwagegap_perc,10.0000001,20)
	replace minwagegap_perc_cat=3 if inrange(minwagegap_perc,20.0000001,30)
	replace minwagegap_perc_cat=4 if inrange(minwagegap_perc,30.0000001,40)
	replace minwagegap_perc_cat=5 if inrange(minwagegap_perc,40.0000001,50)
	replace minwagegap_perc_cat=6 if inrange(minwagegap_perc,50.0000001,60)
	replace minwagegap_perc_cat=7 if inrange(minwagegap_perc,60.0000001,70)
	replace minwagegap_perc_cat=8 if inrange(minwagegap_perc,70.0000001,80)
	replace minwagegap_perc_cat=9  if inrange(minwagegap_perc,80.0000001,90)
	replace minwagegap_perc_cat=10 if inrange(minwagegap_perc,90.0000001,100)
	replace minwagegap_perc_cat=11 if minwagegap_perc>100 & !missing(minwagegap_perc)
	replace minwagegap_perc_cat =. if sh_aminwage==.                           
	la def minwagegap 0 "0" 1 "0-10" 2 "10-20" 3 "20-30" 4 "30-40" 5 "40-50" 6 "50-60" 7 "60-70" 8 "70-80" 9 "80-90" 10 "90-100" 11 ">100"
	la val minwagegap_perc_cat minwagegap 
	
	gen no_aminwage=no_pay_60k85k+no_pay_85k125k+no_pay_a125k 
	
	* Costs of paying the min. wage to all receiving it already
	gen minwagecosts=no_aminwage*75000 if wave==3
	replace minwagecosts=no_aminwage*60000 if inlist(wave,1,2)
	replace minwagecosts=minwagecosts/537.286 // in USD
	replace minwagecosts=minwagecosts/1000 // in '000 USD
	gen minwagecosts_annual=minwagecosts*12
	la var minwagecosts_annual "Hypothetical (annual) wage costs if all above min. wage received exactly the min. wage, ('000 EUR)"
	
	
	gen minwagecosts_perc_2021=100*(minwagecosts_annual/costs_labour_2021) if wave==1
	sort id wave, stable
	by id: replace minwagecosts_perc_2021=minwagecosts_perc_2021[_n-1] if wave==2 & missing(minwagecosts_perc_2021)
	by id: replace minwagecosts_perc_2021=minwagecosts_perc_2021[_n-2] if wave==3 & missing(minwagecosts_perc_2021)
	gen minwagecosts_perc=minwagecosts_annual/costs_labour_2021 if inlist(wave,2,3)
		
	gen sosecgap_perc=100*(sosecgap_annual)/costs_labour_2021
	replace sosecgap_perc=. if costs_labour_2021==0 
	replace sosecgap_perc= . if sh_sosec ==.
	la var sosecgap_perc "Estimated costs of social security compliance (% of 2021 labour costs)"
	
	gen sosecgap_perc_cat=0 if sosecgap_perc==0 
	replace sosecgap_perc_cat=1 if inrange(sosecgap_perc,0.0000001,10)
	replace sosecgap_perc_cat=2 if inrange(sosecgap_perc,10.0000001,20)
	replace sosecgap_perc_cat=3 if inrange(sosecgap_perc,20.0000001,30)
	replace sosecgap_perc_cat=4 if inrange(sosecgap_perc,30.0000001,40)
	replace sosecgap_perc_cat=5 if inrange(sosecgap_perc,40.0000001,50)
	replace sosecgap_perc_cat=6 if inrange(sosecgap_perc,50.0000001,60)
	replace sosecgap_perc_cat=7 if inrange(sosecgap_perc,60.0000001,70)
	replace sosecgap_perc_cat=8 if inrange(sosecgap_perc,70.0000001,80)
	replace sosecgap_perc_cat=9  if inrange(sosecgap_perc,80.0000001,90)
	replace sosecgap_perc_cat=10 if inrange(sosecgap_perc,90.0000001,100)
	replace sosecgap_perc_cat =. if sh_sosec==.                           
            
	la def sosecgap 0 "0" 1 "0-10" 2 "10-20" 3 "20-30" 4 "30-40" 5 "40-50" 6 "50-60" 7 "60-70" 8 "70-80" 9 "80-90" 10 "90-100" 11 ">100"
	la val sosecgap_perc_cat sosecgap 

	
* Create CDF graphs 
keep if wave==1 
foreach var of varlist minwagegap_perc minwagegap_annual minwagegap minwagegap_perc_cat {
	replace `var'=. if sh_aminwage==1 
}
foreach var of varlist sosecgap_perc sosecgap_annual sosecgap sosecgap_perc_cat {
	replace `var'=. if sh_sosec==1 
}


* First for minimum wage gap
gen n = 1
preserve
collapse (sum) n, by(minwagegap_perc_cat)
rename minwagegap_perc_cat gap
sort gap
drop if gap == 0
drop if  gap ==.
egen sum = sum(n)
gen percent_minwage = n/sum
drop if gap ==11
tempfile minwage
ren n n_mw
save `minwage'
restore

* Then for social security gap
collapse (sum) n, by(sosecgap_perc_cat)
rename sosecgap_perc_cat gap
sort gap
drop if gap == 0
drop if  gap ==.
egen sum = sum(n)
gen percent_sosec = n/sum
drop if gap ==11
merge 1:1 gap using `minwage', nogen

graph bar n n_mw, over(gap) ///
    legend(label(1 "Social security") ///
           label(2 "Minimum wage")) ///
    ytitle("Number of Firms") b1title("Gap to be fully compliant as a share of baseline labor costs")
	graph export "$results\02_figures\Figure_S1_distribution_gap_mw_sosec.png", as(png) replace 

/*export graph for formatted submission
graph bar n n_mw, over(gap) ///
    legend(label(1 "Social security") ///
           label(2 "Minimum wage")) ///
    ytitle("Number of Firms") ///
    b1title("Gap to be fully compliant as a share of baseline labor costs") ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    bar(1, color(gs10)) ///
    bar(2, color(gs4))
graph export "$results\02_figures\Figure_S1_distribution_gap_mw_sosec_blackwhite.tif", as(tif) replace
graph export "$results\02_figures\Figure_S1_distribution_gap_mw_sosec_blackwhite.png", as(png) replace 

